# !diagnostics off
library(readr)
library(magrittr)
library(sandwich)
library(xtable)
library(ggplot2)
library(lmtest)
library(haven)
library(fixest)
library(Hmisc)
library(stargazer)
library(vtable)
library(conleyreg)
library(plm)
library(dplyr)
library(rdrobust)
library(descr)
library(ggthemes)
library(interplot)
library(mediation)
library(readxl)
library(broom)
library(tidyverse)
library(hrbrthemes)
library(logistf) 
library(gridExtra)
library(dotwhisker)
library(sf)
library(spdep)
library(spatialreg)
library(splm)
library(geosphere)
library(broom.mixed)
library(ggpubr)
library(jtools)
library(tigris)
library(lfe)
library(devtools)
library(survival)
library(ggsurvfit)
library(ggfortify)
library(imputeTS)


### must run zero policy options twice; otherwise won't work 
spdep::set.ZeroPolicyOption(TRUE)
spdep::set.ZeroPolicyOption(TRUE)
spatialreg::set.ZeroPolicyOption(TRUE)
spatialreg::set.ZeroPolicyOption(TRUE)


 
######## Set working directory to this folder ######
# setwd(getwd())


######## read in data ##############
base <- read_csv("gridjoined.csv") 
dim(base) #7544
 
##expand base data 
base$gridid <- base$OBJECTID
base= base %>% mutate(
  dispute= ifelse(is.na(claim)==T, 0 , 1 )
)


######## resources #######
### coding only non-time varying resources
### time varying resources will be coded later 
 

## crops 
base$ag %>% table (useNA= 'ifany')
base= base %>% mutate(
  cropland = ifelse (is.na (ag) == T, 0 ,1)
)
base$cropland %>% table (useNA = 'ifany')
# 508 7036 

## ocean
base$ocean %>% table (useNA = 'ifany')
base= base %>% mutate(
  oceantile = ifelse (is.na (ocean) == T, 0 ,1)
)
base$oceantile %>% table (useNA = 'ifany')
# 6058 1486 

## forestry 
base$forest  %>% table (useNA = 'ifany')
# 722 6822 

##fish and forestry combined
base = base %>% mutate(
  forfish= ifelse(forest==1 | oceantile==1, 1, 0)
  
)
base$forfish %>% table (useNA = 'ifany')
#  161 7383 



################## Controls ########################
## jurisdiction overlap

base= base %>% mutate(
  overlap = ifelse (is.na (Overlap_count) == T, 0 ,1),
  nocol= ifelse (is.na (nocol) == T, 0 ,1),
  uoa= ifelse(overlap==1 | nocol==1, 1, 0)
)
  

# border
base$bordert<- base$border
base= base %>% mutate(
  border = ifelse (is.na (bordert) == T, 0 ,1),
  uoabord= ifelse(uoa==1 | border==1, 1, 0)
)
base$uoabord %>% table (useNA = 'ifany')
# 0    1 
# 3405 4139 
base$border %>% table
# 0    1 
# 4814 2730 

### data with only high risk gridcells 
uoabord <- base %>% filter(uoabord==1)
 


#------------------------------------------
######### expand data frames #######
#------------------------------------------


#### call in data with territorial claim information 
dupdat <- read_csv("gridjoined_yr.csv")
# Rows: 10094 Columns: 48    
# this data is multi-joined because one gridcell can be subject to multiple claims at the same time
 
dupdat = dupdat %>% mutate(
  begclaim = as.numeric(begclaim),
  endclaim= as.numeric(endclaim) )
 


#### expanding grid data 

## set up basic panel structure 
time= seq(1830, 2001, 1)
reprow= rep(rownames(dupdat), length(time)) %>% as.numeric()
dupdat_exp= dupdat[reprow, ]

 
## duplicate mark (not by grid but by how many there are)
dupdat_exp= dupdat_exp %>% arrange(OBJECTID) %>%  group_by(OBJECTID) %>% 
  mutate(dup= 1:n())

#transform to year
dupdat_exp= dupdat_exp %>% mutate(
  year= 1830+ dup -1) %>% arrange(gridid, year)

#### populate expanded data (dupdat) #####
## put in multiple disputes accordingly 
dupdat_exp= dupdat_exp %>% group_by(gridid) %>% 
  mutate(
    start= ifelse(begclaim== year, 1, 0),
    end= ifelse(endclaim== year, 1, 0),
    check= ifelse(year>=begclaim & year <= endclaim, 1, 0),
    check= replace_na(check, 0))

#code a gridcell as disputed if it was involved in any dispute 
dupdat_exp= dupdat_exp %>% group_by(gridid, year) %>% 
  mutate(disputed = max(check))  
 
dupdat_exp$disputed %>% table (useNA = 'ifany')
#1362857  373311 
  


###### filter disputes based on claimed area  #######

## call in data that has information on gridcell landmass area 
landarea <- read_csv("landarea.csv")
#drop ocean mass
landarea <- landarea %>% filter(FID_southamerica==1)  



## call in data that has info on disputed area for each gridcell 
union <- read_csv("uniondata2.csv")
 
 
#add in landmass of gridcell into union data
union$landmass <- landarea$landmass[match(union$Grid, landarea$Grid)]
 

claimed= union %>% filter(claim!=0) 
claimed= claimed %>% group_by(claimre, Grid) %>% mutate(
  percent= overlap_area/landmass, #percent of claim overlapping with gridcell
  sumoverlap= sum(percent), #add fragmented polygon overlaps for each unique claim
  sumoverlap= ifelse(sumoverlap >= 1 , 1, sumoverlap) #adjusting for percent sometimes going over 1 because of overlaps within claim polygons.
)
claimed$claimre %>% unique() %>% length #61  
 

###### For an example see: 
# claimed %>% filter(Grid==21) %>%
#   dplyr::select(c('overlap_area', 'percent','sumoverlap', 'landmass'))  %>% View()
 
 

## now get it  to a unique gridcell table
tab= claimed %>% distinct( Grid,claimre, sumoverlap)
## to dup_exp add in
dupdat_exp$disparea <-tab$sumoverlap[match(interaction(dupdat_exp$gridid, dupdat_exp$claimre),
                                         interaction(tab$Grid, tab$claimre))]

###### for an example see: 
# shows that for claim No.1543 the area claimed is 22% of the polygon; for No.1530, 99.9%
# dupdat_exp %>% filter(gridid== 2595) %>%
#   dplyr::select(c( 'gridid','year','start', 'end',
#                   'disputed', 'check', 'claimre', 'disparea' ))  %>% View()
 
# turn NAs into 0 
dupdat_exp= dupdat_exp %>% mutate(
  disparea= ifelse(is.na(disparea)==T, 0, disparea)
) 

 
 
#################################
### Base-exp make ####
###############################


##create expanded base
reprow= rep(rownames(base), length(time)) %>% as.numeric()
base_exp= base[reprow, ]


#duplicate mark
base_exp= base_exp %>% group_by(gridid) %>% 
  mutate(dup= 1:n())
#transform to year
base_exp= base_exp %>% mutate(
  year= 1830+ dup -1
) 
base_exp= base_exp %>% arrange(gridid, year)
rm(reprow)

##### filter by area ####


dupdat_exp$disputed_thresh <- NULL
dupdat_exp$disputed <-NULL

## threshold for counting as "disputed": 20% of gridcell claimed
## this recodes 14.6% of 'disputed' gridcells as undisputed
## results hold robust when changing this threshold to 0.50 or 0.80. 

dupdat_exp= dupdat_exp %>% mutate(
  disputed_thresh= ifelse(check==1 & disparea>=0.20, 1, 0), 
  disputed_thresh50= ifelse(check==1 & disparea>=0.50, 1, 0), 
  disputed_thresh80= ifelse(check==1 & disparea>=0.80, 1, 0)
)
 
 
#code a gridcell to meet the dispute threshold 
# if a gridcell has one claim that disputes more than 20% of its area
dupdat_exp= dupdat_exp %>% group_by(gridid, year) %>% mutate(
  disputed_thresh= max(disputed_thresh),
  disputed_thresh50= max(disputed_thresh50),
  disputed_thresh80= max(disputed_thresh80)
)
 
 
##### Add dispute info into main data ####

base_exp$disputed <-NULL
base_exp$disputed= dupdat_exp$disputed_thresh[match(interaction(base_exp$gridid, base_exp$year),
                                        interaction(dupdat_exp$gridid, dupdat_exp$year))]
base_exp$disputed50= dupdat_exp$disputed_thresh50[match(interaction(base_exp$gridid, base_exp$year),
                                                    interaction(dupdat_exp$gridid, dupdat_exp$year))]
base_exp$disputed80= dupdat_exp$disputed_thresh80[match(interaction(base_exp$gridid, base_exp$year),
                                                    interaction(dupdat_exp$gridid, dupdat_exp$year))]
base_exp$disputed  %>% table (useNA = 'ifany')
# 20%: 121534  
base_exp$disputed50  %>% table (useNA = 'ifany')
# 50%: 107331
base_exp$disputed80  %>% table (useNA = 'ifany')
# 80%: 95504 
 
 
# code dispute initiation and termination 
base_exp= base_exp %>% group_by(gridid) %>% 
  mutate( 
    lagdis= lag(disputed, 1), 
    leaddis= lead(disputed, 1),
    init= ifelse(disputed==1 &lagdis==1, 2 ,0),
    init= ifelse(disputed==1 &lagdis==0 | leaddis==1 & disputed==1 & is.na(lagdis)==T,
                 1 ,init),
    init= replace_na(init, 0),
    init= ifelse(init==2, NA, init),
    ongoing= ifelse(is.na(init)==T, 1, 0)
  )
base_exp$init %>% table (useNA = 'ifany')
 # 1176034    2122  119412 

 
base_exp= base_exp %>% group_by(gridid) %>% 
  mutate(  
    term= ifelse(disputed==1, 0 ,NA),
    term= ifelse(disputed==0 &lagdis==1, 1 ,term)
  )
 

## previously settled disputes 
base_exp= base_exp %>% group_by(gridid) %>% 
  mutate(  
    termyear=  ifelse(term==1, year, NA),
    first_termyear= min(termyear, na.rm = T)
  )
base_exp= base_exp %>% group_by(gridid) %>% 
  mutate(  
  settled= ifelse(first_termyear<year & max(term, na.rm = T)==1, 1, 0),
  everdisputed= ifelse(max(disputed, na.rm = T)==1, 1,0)
  )
base_exp$settled %>% table (useNA = 'ifany') 
# 1116550  181018 

# ## see for example:
# base_exp %>% filter(gridid== 10) %>%
# dplyr::select(c( 'year','settled','disputed',
#                  'init', 'term', 'termyear', 'first_termyear'))  %>% View()

 
 
#---------------------------------------  

### Add in resource information to baseexp  #####

#---------------------------------------  

##### add in oil data ########
## load in oil data 
oildata <- read_excel("oildata.xlsx")
oildata = oildata %>% group_by(gridid) %>% mutate(
  mindisc = min(DISC)
) 
 
base_exp$mindisc <- oildata$mindisc[match(base_exp$gridid, oildata$gridid)]


### make oil timevarying  
base_exp= base_exp %>% group_by(gridid) %>% 
  mutate(
    oilpresent=  ifelse(year>=mindisc  , 1, 0),
    oilpresent= replace_na(oilpresent, 0)
    )

base_exp$oilpresent %>% table(useNA = 'ifany')
#1259718   37850 
 
 
### discovery of oil 
base_exp= base_exp %>% group_by(gridid) %>%
  mutate(
    oildisc=  ifelse(year==mindisc  , 1, 0),
    oildisc= replace_na(oildisc, 0),
    oildisc= ifelse(oilpresent==1 & oildisc==0, NA, oildisc))
 

##### add in mineral data ##### 

## usgs mineral data is already recorded in base data (merged in GIS)
## also has information on when each mineral became valuable

## create dummy for saltpeter and guano 
base_exp = base_exp %>% group_by(gridid) %>% mutate(
  saltpeter= ifelse(COMMODITY== 'Saltpeter'&year>=startmark &year <=endmakr, 1, 0),
  saltpeter= replace_na(saltpeter, 0),
  guano= ifelse(COMMODITY== 'Guano'&year>=startmark &year <=endmakr, 1, 0),
  guano= replace_na(guano, 0))

##   bring in info on coals
coalsheet  <- read_csv("coalsheet.csv")
base_exp$coals<- coalsheet$COM[match(base_exp$gridid, coalsheet$Grid)]
base_exp= base_exp %>% mutate(
  coals = ifelse(is.na(coals)==F, 1, 0))

 
 

### Minpres coding #####
base_exp$minpres <- NULL
base_exp$minpres_orig <- NULL

# create mineral presence variable 
base_exp= base_exp %>% group_by(gridid) %>% 
  mutate(
    ## all mining: usgs mineral data + coal + guano+ saltpeter
    minpres_all=  ifelse(year>=startmark &year <=endmakr |
    coals==1, 1, 0),
    minpres_all= replace_na(minpres_all, 0),  
 
    ## only usgs mineral data, without guano, coal, saltpeter 
    minpres_orig=  ifelse(year>=startmark &year <=endmakr &guano== 0 &saltpeter==0   , 1, 0),
    minpres_orig= replace_na(minpres_orig, 0)
    )
  

base_exp$minpres_all %>% table(useNA = 'ifany') #     63296  
base_exp$minpres_orig %>% table(useNA = 'ifany') # 20696 
base_exp$coals %>% table(useNA = 'ifany')   #  41452 

### discovery of minerals 
base_exp= base_exp %>% group_by(gridid) %>%
  mutate(
    mineraldisc=  ifelse(year==startmark  , 1, 0),
    mineraldisc= replace_na(mineraldisc, 0),
    mineraldisc= ifelse(minpres_all==1 & mineraldisc==0, NA, mineraldisc),
    resourcedisc = ifelse(mineraldisc==1 | oildisc==1 , 1 ,0))
 

## code if minerals or oil existed at any time period
base_exp %<>% group_by(gridid) %>% mutate( 
  ever_minerals = max(minpres_all, na.rm = T),
  ever_oil =  max(oilpresent, na.rm = T),
  ever_resource = ifelse(ever_minerals ==1 | ever_oil ==1, 1, 0)
)

#------------------------------------------
### create capital intensity value from NAICS  ######
#------------------------------------------

  base_exp = base_exp  %>% mutate(
    capint= ifelse(cropland==1, 0.17,0),
    capint= ifelse(forfish==1, 0.20,capint),
    capint= ifelse(minpres_all==1, 0.39,capint),
    capint= ifelse(oilpresent==1, 0.9,capint),
    
    capint_orig= ifelse(cropland==1, 0.17,0),
    capint_orig= ifelse(forfish==1, 0.20,capint_orig),
    capint_orig= ifelse(minpres_orig==1, 0.39,capint_orig),
    capint_orig= ifelse(oilpresent==1, 0.9,capint_orig) 
  )
  base_exp$capint %>% table(useNA = 'ifany')
  # 0    0.17     0.2    0.39     0.9 
  # 1508   19073 1179849   59288   37850 
 
  base_exp$capint_orig %>% table(useNA = 'ifany')
  #    0    0.17     0.2    0.39     0.9 
  #  1811   20823 1216956   20128   37850 


########  create ordinal variable (Capord)  ########
base_exp =base_exp  %>% mutate(
   capord= ifelse(minpres_all==1, 'Med', 'Low'),
   capord= ifelse(oilpresent==1, 'High', capord),
   capord_orig= ifelse(minpres_orig==1, 'Med', 'Low'),
   capord_orig= ifelse(oilpresent==1, 'High', capord_orig)
)
  


### Additional ways of coding: 

base_exp= base_exp %>% mutate(
## separate out areas without any resources
    capord_four = case_when(capint== 0 ~ 'None',
                         capint== 0.17 ~ 'Low',
                         capint== 0.20 ~ 'Low',
                         capint== 0.39 ~ 'Med',
                         capint== 0.9 ~ 'High'),
  ## binary value for either minerals or oil 
  capord_bin= ifelse(capord != 'Low', 1, 0)
  )


##### create lead values for later (anticipated discoveries) ########

base_exp= base_exp %>% group_by(gridid) %>% mutate(
  leadoil= lead(oilpresent, 10),
  leadcapord= lead(capord, 10)
)

a= base_exp %>% slice(which(year==1991)) %>% dplyr::select(gridid, leadcapord) %>% arrange(gridid)
base_exp$value91 <- a$leadcapord[match(base_exp$gridid, a$gridid)]  
base_exp= base_exp %>% group_by(gridid) %>% mutate(
  leadcapord_noNA = ifelse(is.na(leadcapord)==T, value91, leadcapord ) 
    ) 

## reorder factors  
base_exp =base_exp  %>% mutate( 
  capord= factor(capord, ordered = F, levels = c( 'Low','Med',  'High')), 
  capord_orig= factor(capord_orig, ordered = F, levels = c('Low', 'Med', 'High')),
  capord_four= factor(capord_four, ordered = F, levels = c('None','Low', 'Med', 'High')),
  leadcapord= factor(leadcapord, ordered = F, levels = c('Low', 'Med', 'High')),
  leadcapord_noNA= factor(leadcapord_noNA, ordered = F, levels = c('Low', 'Med', 'High'))
)


## check 
base_exp$capord %>% table
# 1200430   59288   37850 
base_exp$capord_orig %>% table
#  1239590   20128   37850 
base_exp$capord_four  %>% table(useNA = 'ifany')
# 1508 1198922   59288   37850 
base_exp$capord_bin  %>% table(useNA = 'ifany')
# 1200430   97138 
base_exp$leadoil  %>% table(useNA = 'ifany')
# 1184278   37850   75440 
base_exp$leadcapord  %>% table(useNA = 'ifany')
# 1128230   56048   37850   75440 
base_exp$leadcapord_noNA  %>% table(useNA = 'ifany')
# 1193020   59328   45220 

 
 
### add in information about distance to border  ####
## border distance 
borderdist <- read_csv("borderdist.csv")
borderdist$dist2<- borderdist$dist/1000
base_exp$borderdist <- borderdist$dist2[match(base_exp$gridid, borderdist$gridid)] 
base_exp$borderdist %>% summary
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 0.0     0.0   104.3   348.6   512.6  2526.1 

## also put in for base data
base$borderdist <- borderdist$dist2[match(base$gridid, borderdist$gridid)] 




#-------------------------------------------------------------------------

## Add spatial information ####

#-------------------------------------------------------------------------


## call in spatial information 
spatial <- read_sf('rexport.shp', stringsAsFactors = F)
# leave only gridid and geometry (get rid of unnecessary varialbes)
geodat <- spatial %>%  dplyr::select(OBJECTID_2,geometry)

##pull out coordinates
separated_coord <- geodat %>%
  mutate(long = lapply(map(geodat$geometry,1), "[",1) %>% unlist,
         lat = lapply(map(geodat$geometry,1), "[",6) %>% unlist)

base_exp$lat <- separated_coord$lat[match(base_exp$gridid, separated_coord$OBJECTID_2)]
base_exp$long <- separated_coord$long[match(base_exp$gridid, separated_coord$OBJECTID_2)]
 

#####################################
### Add Terrain information ##########
#####################################

terrain <- read_excel("terrain.xlsx")

## organize data
## see the readme file for why there are multiple rows for some gridcells
terrain$COUNT <-NULL
counttab =terrain %>% group_by(Grid) %>% summarise( count= n(),
                                                    key= 1:n())
terrain$count = counttab$count[match(terrain$Grid, counttab$Grid)]

terrain2 <- terrain
terrain2 %<>% group_by(Grid) %>% mutate(
  terr_min= min(MIN, na.rm=T),
  terr_max= max(MAX, na.rm=T),
  terr_range  = terr_max-terr_min,
  terr_mean= mean(MEAN)
)

terrain2%<>%dplyr::select(Grid, terr_min, terr_max, terr_range, terr_mean, count)
terrain2 %<>% group_by(Grid) %>% slice(1)
terrain2$gridid <- terrain2$Grid


##merge into data
base_exp = left_join(base_exp, terrain2, by='gridid')
 

#---------------------------------------------------
## Information by decade ##########
#---------------------------------------------------

base_exp = base_exp %>% mutate(
  d1830= ifelse(year>=1830 & year<1840, 1, 0),
  d1840= ifelse(year>=1840 & year<1850, 1, 0),
  d1850= ifelse(year>=1850 & year<1860, 1, 0),
  d1860= ifelse(year>=1860 & year<1870, 1, 0),
  d1870= ifelse(year>=1870 & year<1880, 1, 0),
  d1880= ifelse(year>=1880 & year<1890, 1, 0),
  d1890= ifelse(year>=1890 & year<1900, 1, 0),
  d1900= ifelse(year>=1900 & year<1910, 1, 0),
  d1910= ifelse(year>=1910 & year<1920, 1, 0),
  d1920= ifelse(year>=1920 & year<1930, 1, 0),
  d1930= ifelse(year>=1930 & year<1940, 1, 0),
  d1940= ifelse(year>=1940 & year<1950, 1, 0),
  d1950= ifelse(year>=1950 & year<1960, 1, 0),
  d1960= ifelse(year>=1960 & year<1970, 1, 0),
  d1970= ifelse(year>=1970 & year<1980, 1, 0),
  d1980= ifelse(year>=1980 & year<1990, 1, 0),
  d1990= ifelse(year>=1990 , 1, 0)
)
 

## repeat for each decade 
##1830
a= base_exp %>%  filter(d1830==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1830oil <- a$doil[match(base$gridid, a$gridid)]
base$d1830init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1830min <- a$dmin[match(base$gridid, a$gridid)]
base$d1830min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1830ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1830init= ifelse(already==1, NA,d1830init  ), #already disputed cells NA
  d1830capint= ifelse(cropland==1, 0.17,0),
  d1830capint= ifelse(forfish==1, 0.20,d1830capint),
  d1830capint= ifelse(d1830min==1, 0.39,d1830capint),
  d1830capint= ifelse(d1830oil==1, 0.9,d1830capint),
  
  d1830capint2= ifelse(cropland==1, 0.17,0),
  d1830capint2= ifelse(forfish==1, 0.20,d1830capint2),
  d1830capint2= ifelse(d1830min_orig==1, 0.39,d1830capint2),
  d1830capint2= ifelse(d1830oil==1, 0.9,d1830capint2)
)
 

base= base %>% mutate(
  d1830capord= case_when( d1830capint == '0' | d1830capint =='0.17' | d1830capint =='0.2' ~ 'Low',
                          d1830capint== '0.39'~ 'Med',
                          d1830capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1830capord_bin= case_when( d1830capint == '0' | d1830capint =='0.17' | d1830capint =='0.2' ~ 'Low',
                              d1830capint== '0.39'~ 'High',
                              d1830capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1830capord2= case_when( d1830capint2 == '0' | d1830capint2 =='0.17' | d1830capint2 =='0.2' ~ 'Low',
                           d1830capint2== '0.39'~ 'Med',
                           d1830capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1830capord_bin2= case_when( d1830capint2 == '0' | d1830capint2 =='0.17' | d1830capint2 =='0.2' ~ 'Low',
                               d1830capint2== '0.39'~ 'High',
                               d1830capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 


## 1840
a= base_exp %>%  filter(d1840==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1840oil <- a$doil[match(base$gridid, a$gridid)]
base$d1840init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1840min <- a$dmin[match(base$gridid, a$gridid)]
base$d1840min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1840ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1840init= ifelse(already==1, NA,d1840init  ), #already disputed cells NA
  d1840capint= ifelse(cropland==1, 0.17,0),
  d1840capint= ifelse(forfish==1, 0.20,d1840capint),
  d1840capint= ifelse(d1840min==1, 0.39,d1840capint),
  d1840capint= ifelse(d1840oil==1, 0.9,d1840capint),
  
  d1840capint2= ifelse(cropland==1, 0.17,0),
  d1840capint2= ifelse(forfish==1, 0.20,d1840capint2),
  d1840capint2= ifelse(d1840min_orig==1, 0.39,d1840capint2),
  d1840capint2= ifelse(d1840oil==1, 0.9,d1840capint2)
)


base= base %>% mutate(
  d1840capord= case_when( d1840capint == '0' | d1840capint =='0.17' | d1840capint =='0.2' ~ 'Low',
                          d1840capint== '0.39'~ 'Med',
                          d1840capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1840capord_bin= case_when( d1840capint == '0' | d1840capint =='0.17' | d1840capint =='0.2' ~ 'Low',
                              d1840capint== '0.39'~ 'High',
                              d1840capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1840capord2= case_when( d1840capint2 == '0' | d1840capint2 =='0.17' | d1840capint2 =='0.2' ~ 'Low',
                           d1840capint2== '0.39'~ 'Med',
                           d1840capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1840capord_bin2= case_when( d1840capint2 == '0' | d1840capint2 =='0.17' | d1840capint2 =='0.2' ~ 'Low',
                               d1840capint2== '0.39'~ 'High',
                               d1840capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 

## 1850
a= base_exp %>%  filter(d1850==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1850oil <- a$doil[match(base$gridid, a$gridid)]
base$d1850init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1850min <- a$dmin[match(base$gridid, a$gridid)]
base$d1850min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1850ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1850init= ifelse(already==1, NA,d1850init  ), #already disputed cells NA
  d1850capint= ifelse(cropland==1, 0.17,0),
  d1850capint= ifelse(forfish==1, 0.20,d1850capint),
  d1850capint= ifelse(d1850min==1, 0.39,d1850capint),
  d1850capint= ifelse(d1850oil==1, 0.9,d1850capint),
  
  d1850capint2= ifelse(cropland==1, 0.17,0),
  d1850capint2= ifelse(forfish==1, 0.20,d1850capint2),
  d1850capint2= ifelse(d1850min_orig==1, 0.39,d1850capint2),
  d1850capint2= ifelse(d1850oil==1, 0.9,d1850capint2)
)



base= base %>% mutate(
  d1850capord= case_when( d1850capint == '0' | d1850capint =='0.17' | d1850capint =='0.2' ~ 'Low',
                          d1850capint== '0.39'~ 'Med',
                          d1850capint== '0.9' ~ 'High') %>% factor(levels = c('Low', "Med", 'High')),
  d1850capord_bin= case_when( d1850capint == '0' | d1850capint =='0.17' | d1850capint =='0.2' ~ 'Low',
                              d1850capint== '0.39'~ 'High',
                              d1850capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1850capord2= case_when( d1850capint2 == '0' | d1850capint2 =='0.17' | d1850capint2 =='0.2' ~ 'Low',
                           d1850capint2== '0.39'~ 'Med',
                           d1850capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1850capord_bin2= case_when( d1850capint2 == '0' | d1850capint2 =='0.17' | d1850capint2 =='0.2' ~ 'Low',
                               d1850capint2== '0.39'~ 'High',
                               d1850capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 
## 1860
a= base_exp %>%  filter(d1860==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1860oil <- a$doil[match(base$gridid, a$gridid)]
base$d1860init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1860min <- a$dmin[match(base$gridid, a$gridid)]
base$d1860min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1860ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1860init= ifelse(already==1, NA,d1860init  ), #already disputed cells NA
  d1860capint= ifelse(cropland==1, 0.17,0),
  d1860capint= ifelse(forfish==1, 0.20,d1860capint),
  d1860capint= ifelse(d1860min==1, 0.39,d1860capint),
  d1860capint= ifelse(d1860oil==1, 0.9,d1860capint),
  
  d1860capint2= ifelse(cropland==1, 0.17,0),
  d1860capint2= ifelse(forfish==1, 0.20,d1860capint2),
  d1860capint2= ifelse(d1860min_orig==1, 0.39,d1860capint2),
  d1860capint2= ifelse(d1860oil==1, 0.9,d1860capint2)
)


base= base %>% mutate(
  d1860capord= case_when( d1860capint == '0' | d1860capint =='0.17' | d1860capint =='0.2' ~ 'Low',
                          d1860capint== '0.39'~ 'Med',
                          d1860capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1860capord_bin= case_when( d1860capint == '0' | d1860capint =='0.17' | d1860capint =='0.2' ~ 'Low',
                              d1860capint== '0.39'~ 'High',
                              d1860capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1860capord2= case_when( d1860capint2 == '0' | d1860capint2 =='0.17' | d1860capint2 =='0.2' ~ 'Low',
                           d1860capint2== '0.39'~ 'Med',
                           d1860capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1860capord_bin2= case_when( d1860capint2 == '0' | d1860capint2 =='0.17' | d1860capint2 =='0.2' ~ 'Low',
                               d1860capint2== '0.39'~ 'High',
                               d1860capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 



## 1870
a= base_exp %>%  filter(d1870==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1870oil <- a$doil[match(base$gridid, a$gridid)]
base$d1870init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1870min <- a$dmin[match(base$gridid, a$gridid)]
base$d1870min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1870ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1870init= ifelse(already==1, NA,d1870init  ), #already disputed cells NA
  d1870capint= ifelse(cropland==1, 0.17,0),
  d1870capint= ifelse(forfish==1, 0.20,d1870capint),
  d1870capint= ifelse(d1870min==1, 0.39,d1870capint),
  d1870capint= ifelse(d1870oil==1, 0.9,d1870capint),
  
  d1870capint2= ifelse(cropland==1, 0.17,0),
  d1870capint2= ifelse(forfish==1, 0.20,d1870capint2),
  d1870capint2= ifelse(d1870min_orig==1, 0.39,d1870capint2),
  d1870capint2= ifelse(d1870oil==1, 0.9,d1870capint2)
)


base= base %>% mutate(
  d1870capord= case_when( d1870capint == '0' | d1870capint =='0.17' | d1870capint =='0.2' ~ 'Low',
                          d1870capint== '0.39'~ 'Med',
                          d1870capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1870capord_bin= case_when( d1870capint == '0' | d1870capint =='0.17' | d1870capint =='0.2' ~ 'Low',
                              d1870capint== '0.39'~ 'High',
                              d1870capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1870capord2= case_when( d1870capint2 == '0' | d1870capint2 =='0.17' | d1870capint2 =='0.2' ~ 'Low',
                           d1870capint2== '0.39'~ 'Med',
                           d1870capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1870capord_bin2= case_when( d1870capint2 == '0' | d1870capint2 =='0.17' | d1870capint2 =='0.2' ~ 'Low',
                               d1870capint2== '0.39'~ 'High',
                               d1870capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 

## 1880
a= base_exp %>%  filter(d1880==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1880oil <- a$doil[match(base$gridid, a$gridid)]
base$d1880init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1880min <- a$dmin[match(base$gridid, a$gridid)]
base$d1880min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1880ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1880init= ifelse(already==1, NA,d1880init  ), #already disputed cells NA
  d1880capint= ifelse(cropland==1, 0.17,0),
  d1880capint= ifelse(forfish==1, 0.20,d1880capint),
  d1880capint= ifelse(d1880min==1, 0.39,d1880capint),
  d1880capint= ifelse(d1880oil==1, 0.9,d1880capint),
  
  d1880capint2= ifelse(cropland==1, 0.17,0),
  d1880capint2= ifelse(forfish==1, 0.20,d1880capint2),
  d1880capint2= ifelse(d1880min_orig==1, 0.39,d1880capint2),
  d1880capint2= ifelse(d1880oil==1, 0.9,d1880capint2)
)



base= base %>% mutate(
  d1880capord= case_when( d1880capint == '0' | d1880capint =='0.17' | d1880capint =='0.2' ~ 'Low',
                          d1880capint== '0.39'~ 'Med',
                          d1880capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1880capord_bin= case_when( d1880capint == '0' | d1880capint =='0.17' | d1880capint =='0.2' ~ 'Low',
                              d1880capint== '0.39'~ 'High',
                              d1880capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1880capord2= case_when( d1880capint2 == '0' | d1880capint2 =='0.17' | d1880capint2 =='0.2' ~ 'Low',
                           d1880capint2== '0.39'~ 'Med',
                           d1880capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1880capord_bin2= case_when( d1880capint2 == '0' | d1880capint2 =='0.17' | d1880capint2 =='0.2' ~ 'Low',
                               d1880capint2== '0.39'~ 'High',
                               d1880capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 

## 1890
a= base_exp %>%  filter(d1890==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1890oil <- a$doil[match(base$gridid, a$gridid)]
base$d1890init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1890min <- a$dmin[match(base$gridid, a$gridid)]
base$d1890min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1890ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1890init= ifelse(already==1, NA,d1890init  ), #already disputed cells NA
  d1890capint= ifelse(cropland==1, 0.17,0),
  d1890capint= ifelse(forfish==1, 0.20,d1890capint),
  d1890capint= ifelse(d1890min==1, 0.39,d1890capint),
  d1890capint= ifelse(d1890oil==1, 0.9,d1890capint),
  
  d1890capint2= ifelse(cropland==1, 0.17,0),
  d1890capint2= ifelse(forfish==1, 0.20,d1890capint2),
  d1890capint2= ifelse(d1890min_orig==1, 0.39,d1890capint2),
  d1890capint2= ifelse(d1890oil==1, 0.9,d1890capint2)
)


base= base %>% mutate(
  d1890capord= case_when( d1890capint == '0' | d1890capint =='0.17' | d1890capint =='0.2' ~ 'Low',
                          d1890capint== '0.39'~ 'Med',
                          d1890capint== '0.9' ~ 'High') %>% factor(levels = c('Low', "Med", 'High')),
  d1890capord_bin= case_when( d1890capint == '0' | d1890capint =='0.17' | d1890capint =='0.2' ~ 'Low',
                              d1890capint== '0.39'~ 'High',
                              d1890capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1890capord2= case_when( d1890capint2 == '0' | d1890capint2 =='0.17' | d1890capint2 =='0.2' ~ 'Low',
                           d1890capint2== '0.39'~ 'Med',
                           d1890capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1890capord_bin2= case_when( d1890capint2 == '0' | d1890capint2 =='0.17' | d1890capint2 =='0.2' ~ 'Low',
                               d1890capint2== '0.39'~ 'High',
                               d1890capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 

## 1900
a= base_exp %>%  filter(d1900==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1900oil <- a$doil[match(base$gridid, a$gridid)]
base$d1900init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1900min <- a$dmin[match(base$gridid, a$gridid)]
base$d1900min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1900ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1900init= ifelse(already==1, NA,d1900init  ), #already disputed cells NA
  d1900capint= ifelse(cropland==1, 0.17,0),
  d1900capint= ifelse(forfish==1, 0.20,d1900capint),
  d1900capint= ifelse(d1900min==1, 0.39,d1900capint),
  d1900capint= ifelse(d1900oil==1, 0.9,d1900capint),
  
  d1900capint2= ifelse(cropland==1, 0.17,0),
  d1900capint2= ifelse(forfish==1, 0.20,d1900capint2),
  d1900capint2= ifelse(d1900min_orig==1, 0.39,d1900capint2),
  d1900capint2= ifelse(d1900oil==1, 0.9,d1900capint2)
)


base= base %>% mutate(
  d1900capord= case_when( d1900capint == '0' | d1900capint =='0.17' | d1900capint =='0.2' ~ 'Low',
                          d1900capint== '0.39'~ 'Med',
                          d1900capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1900capord_bin= case_when( d1900capint == '0' | d1900capint =='0.17' | d1900capint =='0.2' ~ 'Low',
                              d1900capint== '0.39'~ 'High',
                              d1900capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1900capord2= case_when( d1900capint2 == '0' | d1900capint2 =='0.17' | d1900capint2 =='0.2' ~ 'Low',
                           d1900capint2== '0.39'~ 'Med',
                           d1900capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1900capord_bin2= case_when( d1900capint2 == '0' | d1900capint2 =='0.17' | d1900capint2 =='0.2' ~ 'Low',
                               d1900capint2== '0.39'~ 'High',
                               d1900capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)
 


## 1910
a= base_exp %>%  filter(d1910==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1910oil <- a$doil[match(base$gridid, a$gridid)]
base$d1910init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1910min <- a$dmin[match(base$gridid, a$gridid)]
base$d1910min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1910ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1910init= ifelse(already==1, NA,d1910init  ), #already disputed cells NA
  d1910capint= ifelse(cropland==1, 0.17,0),
  d1910capint= ifelse(forfish==1, 0.20,d1910capint),
  d1910capint= ifelse(d1910min==1, 0.39,d1910capint),
  d1910capint= ifelse(d1910oil==1, 0.9,d1910capint),
  
  d1910capint2= ifelse(cropland==1, 0.17,0),
  d1910capint2= ifelse(forfish==1, 0.20,d1910capint2),
  d1910capint2= ifelse(d1910min_orig==1, 0.39,d1910capint2),
  d1910capint2= ifelse(d1910oil==1, 0.9,d1910capint2)
)


base= base %>% mutate(
  d1910capord= case_when( d1910capint == '0' | d1910capint =='0.17' | d1910capint =='0.2' ~ 'Low',
                          d1910capint== '0.39'~ 'Med',
                          d1910capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1910capord_bin= case_when( d1910capint == '0' | d1910capint =='0.17' | d1910capint =='0.2' ~ 'Low',
                              d1910capint== '0.39'~ 'High',
                              d1910capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1910capord2= case_when( d1910capint2 == '0' | d1910capint2 =='0.17' | d1910capint2 =='0.2' ~ 'Low',
                           d1910capint2== '0.39'~ 'Med',
                           d1910capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1910capord_bin2= case_when( d1910capint2 == '0' | d1910capint2 =='0.17' | d1910capint2 =='0.2' ~ 'Low',
                               d1910capint2== '0.39'~ 'High',
                               d1910capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)
 

##1920
a= base_exp %>%  filter(d1920==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1920oil <- a$doil[match(base$gridid, a$gridid)]
base$d1920init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1920min <- a$dmin[match(base$gridid, a$gridid)]
base$d1920min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1920ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1920init= ifelse(already==1, NA,d1920init  ), #already disputed cells NA
  d1920capint= ifelse(cropland==1, 0.17,0),
  d1920capint= ifelse(forfish==1, 0.20,d1920capint),
  d1920capint= ifelse(d1920min==1, 0.39,d1920capint),
  d1920capint= ifelse(d1920oil==1, 0.9,d1920capint),
  
  d1920capint2= ifelse(cropland==1, 0.17,0),
  d1920capint2= ifelse(forfish==1, 0.20,d1920capint2),
  d1920capint2= ifelse(d1920min_orig==1, 0.39,d1920capint2),
  d1920capint2= ifelse(d1920oil==1, 0.9,d1920capint2)
)


base= base %>% mutate(
  d1920capord= case_when( d1920capint == '0' | d1920capint =='0.17' | d1920capint =='0.2' ~ 'Low',
                          d1920capint== '0.39'~ 'Med',
                          d1920capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1920capord_bin= case_when( d1920capint == '0' | d1920capint =='0.17' | d1920capint =='0.2' ~ 'Low',
                              d1920capint== '0.39'~ 'High',
                              d1920capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1920capord2= case_when( d1920capint2 == '0' | d1920capint2 =='0.17' | d1920capint2 =='0.2' ~ 'Low',
                           d1920capint2== '0.39'~ 'Med',
                           d1920capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1920capord_bin2= case_when( d1920capint2 == '0' | d1920capint2 =='0.17' | d1920capint2 =='0.2' ~ 'Low',
                               d1920capint2== '0.39'~ 'High',
                               d1920capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 


##1930
a= base_exp %>%  filter(d1930==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1930oil <- a$doil[match(base$gridid, a$gridid)]
base$d1930init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1930min <- a$dmin[match(base$gridid, a$gridid)]
base$d1930min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1930ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1930init= ifelse(already==1, NA,d1930init  ), #already disputed cells NA
  d1930capint= ifelse(cropland==1, 0.17,0),
  d1930capint= ifelse(forfish==1, 0.20,d1930capint),
  d1930capint= ifelse(d1930min==1, 0.39,d1930capint),
  d1930capint= ifelse(d1930oil==1, 0.9,d1930capint),
  
  d1930capint2= ifelse(cropland==1, 0.17,0),
  d1930capint2= ifelse(forfish==1, 0.20,d1930capint2),
  d1930capint2= ifelse(d1930min_orig==1, 0.39,d1930capint2),
  d1930capint2= ifelse(d1930oil==1, 0.9,d1930capint2)
)



base= base %>% mutate(
  d1930capord= case_when( d1930capint == '0' | d1930capint =='0.17' | d1930capint =='0.2' ~ 'Low',
                          d1930capint== '0.39'~ 'Med',
                          d1930capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1930capord_bin= case_when( d1930capint == '0' | d1930capint =='0.17' | d1930capint =='0.2' ~ 'Low',
                              d1930capint== '0.39'~ 'High',
                              d1930capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1930capord2= case_when( d1930capint2 == '0' | d1930capint2 =='0.17' | d1930capint2 =='0.2' ~ 'Low',
                           d1930capint2== '0.39'~ 'Med',
                           d1930capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1930capord_bin2= case_when( d1930capint2 == '0' | d1930capint2 =='0.17' | d1930capint2 =='0.2' ~ 'Low',
                               d1930capint2== '0.39'~ 'High',
                               d1930capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 


##1940
a= base_exp %>%  filter(d1940==1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1940oil <- a$doil[match(base$gridid, a$gridid)]
base$d1940init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1940min <- a$dmin[match(base$gridid, a$gridid)]
base$d1940min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1940ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1940init= ifelse(already==1, NA,d1940init  ), #already disputed cells NA
  d1940capint= ifelse(cropland==1, 0.17,0),
  d1940capint= ifelse(forfish==1, 0.20,d1940capint),
  d1940capint= ifelse(d1940min==1, 0.39,d1940capint),
  d1940capint= ifelse(d1940oil==1, 0.9,d1940capint),
  
  d1940capint2= ifelse(cropland==1, 0.17,0),
  d1940capint2= ifelse(forfish==1, 0.20,d1940capint2),
  d1940capint2= ifelse(d1940min_orig==1, 0.39,d1940capint2),
  d1940capint2= ifelse(d1940oil==1, 0.9,d1940capint2)
)


base= base %>% mutate(
  d1940capord= case_when( d1940capint == '0' | d1940capint =='0.17' | d1940capint =='0.2' ~ 'Low',
                          d1940capint== '0.39'~ 'Med',
                          d1940capint== '0.9' ~ 'High') %>% factor(levels = c('Low', "Med", 'High')),
  d1940capord_bin= case_when( d1940capint == '0' | d1940capint =='0.17' | d1940capint =='0.2' ~ 'Low',
                              d1940capint== '0.39'~ 'High',
                              d1940capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1940capord2= case_when( d1940capint2 == '0' | d1940capint2 =='0.17' | d1940capint2 =='0.2' ~ 'Low',
                           d1940capint2== '0.39'~ 'Med',
                           d1940capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1940capord_bin2= case_when( d1940capint2 == '0' | d1940capint2 =='0.17' | d1940capint2 =='0.2' ~ 'Low',
                               d1940capint2== '0.39'~ 'High',
                               d1940capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 

##1950
a= base_exp %>%  filter(d1950>=1) %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$d1950oil <- a$doil[match(base$gridid, a$gridid)]
base$d1950init <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$d1950min <- a$dmin[match(base$gridid, a$gridid)]
base$d1950min_orig <- a$dmin_nc[match(base$gridid, a$gridid)]
base$d1950ddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  d1950init= ifelse(already==1, NA,d1950init  ), #already disputed cells NA
  d1950capint= ifelse(cropland==1, 0.17,0),
  d1950capint= ifelse(forfish==1, 0.20,d1950capint),
  d1950capint= ifelse(d1950min==1, 0.39,d1950capint),
  d1950capint= ifelse(d1950oil==1, 0.9,d1950capint),
  
  d1950capint2= ifelse(cropland==1, 0.17,0),
  d1950capint2= ifelse(forfish==1, 0.20,d1950capint2),
  d1950capint2= ifelse(d1950min_orig==1, 0.39,d1950capint2),
  d1950capint2= ifelse(d1950oil==1, 0.9,d1950capint2)
)


base= base %>% mutate(
  d1950capord= case_when( d1950capint == '0' | d1950capint =='0.17' | d1950capint =='0.2' ~ 'Low',
                          d1950capint== '0.39'~ 'Med',
                          d1950capint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  d1950capord_bin= case_when( d1950capint == '0' | d1950capint =='0.17' | d1950capint =='0.2' ~ 'Low',
                              d1950capint== '0.39'~ 'High',
                              d1950capint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  d1950capord2= case_when( d1950capint2 == '0' | d1950capint2 =='0.17' | d1950capint2 =='0.2' ~ 'Low',
                           d1950capint2== '0.39'~ 'Med',
                           d1950capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  d1950capord_bin2= case_when( d1950capint2 == '0' | d1950capint2 =='0.17' | d1950capint2 =='0.2' ~ 'Low',
                               d1950capint2== '0.39'~ 'High',
                               d1950capint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 
##all
a= base_exp %>%  group_by(gridid) %>%mutate(
  doil= ifelse(oilpresent==1, 1, 0),
  doil= max(doil),
  init= replace_na(init, 0),
  dinit= max(init),
  already= ifelse(disputed==1 &dinit ==0, 1, 0),
  dmin= ifelse(minpres_all==1 , 1, 0),
  dmin= max(dmin),
  dmin_nc=ifelse(minpres_orig==1 |is.na(cstartmark)==F, 1, 0),
  dmin_nc= max(dmin_nc),
  ddisp= ifelse(disputed==1, 1, 0),
  ddisp= max(ddisp)
)
base$dalloil <- a$doil[match(base$gridid, a$gridid)]
base$dallinit <- a$dinit[match(base$gridid, a$gridid)]
base$already <- a$already[match(base$gridid, a$gridid)]
base$dallmin <- a$dmin[match(base$gridid, a$gridid)]
base$dallmin_nc <- a$dmin_nc[match(base$gridid, a$gridid)]
base$dallddisp <- a$ddisp[match(base$gridid, a$gridid)]
base = base %>% mutate(
  dallinit= ifelse(already==1, NA,dallinit  ), #already disputed cells NA
  dallcapint= ifelse(cropland==1, 0.17,0),
  dallcapint= ifelse(forfish==1, 0.20,dallcapint),
  dallcapint= ifelse(dallmin==1, 0.39,dallcapint),
  dallcapint= ifelse(dalloil==1, 0.9,dallcapint),
  
  dallcapint2= ifelse(cropland==1, 0.17,0),
  dallcapint2= ifelse(forfish==1, 0.20,dallcapint2),
  dallcapint2= ifelse(dallmin_nc==1, 0.39,dallcapint2),
  dallcapint2= ifelse(dalloil==1, 0.9,dallcapint2)
)

base= base %>% mutate(
  dallcapord= case_when( dallcapint == '0' | dallcapint =='0.17' | dallcapint =='0.2' ~ 'Low',
                         dallcapint== '0.39'~ 'Med',
                         dallcapint== '0.9' ~ 'High') %>%  factor(levels = c('Low', "Med", 'High')),
  dallcapord_bin= case_when( dallcapint == '0' | dallcapint =='0.17' | dallcapint =='0.2' ~ 'Low',
                             dallcapint== '0.39'~ 'High',
                             dallcapint== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  
  dallcapord2= case_when( dallcapint2 == '0' | dallcapint2 =='0.17' | dallcapint2 =='0.2' ~ 'Low',
                          dallcapint2== '0.39'~ 'Med',
                          dallcapint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low'),
  dallcapord_bin2= case_when( dallcapint2 == '0' | dallcapint2 =='0.17' | dallcapint2 =='0.2' ~ 'Low',
                              dallcapint2== '0.39'~ 'High',
                              dallcapint2== '0.9' ~ 'High') %>% as.factor %>% relevel(ref = 'Low')
)

 


# ##  For example: 
# decade<- str_c('d',seq(1830, 1950, 10))
# decade
# base %>% filter(gridid== 7511) %>% 
#   dplyr::select(str_c(decade,'ddisp'), 'dispute') %>% t 
 

### data with only high risk gridcells 
uoabord <- base %>% filter(uoabord==1)





#------------------------------------------------------------------------------
##########################  Save final data ############################### 

write.csv(base_exp,  'main-gridyear.csv')
write.csv(base,  'main-grid.csv')
# save.image()
#------------------------------------------------------------------------------


#------------------------------------------------------------------------------
####### Below: 
####### Codes used to make the dyad-grid-year data used in appendix  ########### 
#------------------------------------------------------------------------------
 

#-----------------------------------------------
## First, make dyad-year data without gridcell info 
#----------------------------------------------

dyaddat <- read_csv("country_dyad.csv")
dyaddat %<>% mutate(
  dyadid= ifelse(ccodeA <ccodeB, ccodeA*1000+ccodeB, ccodeB*1000+ccodeA)
)

##now expand 
time= 1830:2001
dat= expand.grid(dyaddat$dyadid, time)  
dat = dat %>% 
  rename('dyadid' =  Var1, 'year'= Var2) %>% arrange(dyadid)
dat= left_join(dat, dyaddat, by='dyadid')

#######  add CINC values  ########
## read in NMC data
nmc <- read_csv("NMC-60-abridged.csv")

dat$cincA = nmc$cinc[match(interaction(dat$ccodeA, dat$year), interaction(nmc$ccode, nmc$year))]
dat$cincB = nmc$cinc[match(interaction(dat$ccodeB, dat$year), interaction(nmc$ccode, nmc$year))]
dat$bop= dat$cincA/(dat$cincA + dat$cincB)

dat= dat %>% group_by(dyadid) %>% mutate(
  lagcincA = dplyr::lag(cincA, 1),
  lagcincB = dplyr::lag(cincB, 1),
  lagbop = dplyr::lag(bop, 1),
  changebop = abs(bop-lagbop)/lagbop,
  changebop_log = log(abs(bop-lagbop)/lagbop),
  tencincA = ifelse(abs(cincA-lagcincA) > lagcincA*0.1, 1, 0),
  tencincB = ifelse(abs(cincB-lagcincB) > lagcincB*0.1, 1, 0),
  tenchange = ifelse(tencincA ==1 | tencincB==1, 1, 0)
)


#### adjusting for colonial occupation:
## replace UK for Guyana and NTH for Suriname
ukcinc <- nmc %>% filter( year>= 1830 & year<2002 &ccode==200)  %>% pull(cinc)
nthcinc1 <- nmc %>% filter(year>= 1830 & year<1941 & ccode==210)  %>% pull(cinc)
nthcinc2 <- nmc %>% filter(year>= 1945 & year<2002 & ccode==210)  %>% pull(cinc)
nthcinc = c(nthcinc1, rep(NA,4), nthcinc2) #nths CINC missing 1941 1942 1943 1944

dat$colcinc<-NULL
dat %<>% mutate(
  colcinc_uk = case_when(ccodeA ==110 ~ ukcinc,
                         ccodeB ==110 ~ukcinc,
                         T ~ NA),
  colcinc_nth = case_when(ccodeB==115 ~nthcinc,
                          ccodeA ==115 ~nthcinc,
                          T ~ NA)
)


## keep original CINC values 
dat$cincorgA <- dat$cincA
dat$cincorgB <- dat$cincB
##replace NA values with colonial values 
dat %<>% mutate(
  cincA = ifelse(ccodeA ==110 & year <1966, colcinc_uk, 
                 ifelse(ccodeA ==115 & year <1975, colcinc_nth, cincA)),
  cincB =   ifelse(ccodeB ==115 & year <1975, colcinc_nth, cincB)
)
rm(nmc)

#-----------------------------------------------------
####### Add in Gridcell information  ##########
#-----------------------------------------------------

###### (1) Data on Historically Claimable Gridcells ######  
overlapinfo <- read_csv("overlapinfo3.csv")
overlapinfo$gridid <- overlapinfo$Grid
overlapinfo$Grid <- NULL
overlapinfo$`OBJECTID *` <- NULL
overlapinfo %<>% dplyr::select('gridid', everything()) 

## for each dyad, see what gridcells are historically claimable 
m100101= cbind (
  dyadid= 100101,
  gridid= subset(overlapinfo,  `100`== 1 &  `101` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `100`== 1 &  `101` ==1) %>% pull(claimants) %>% unique

m100130= cbind (
  dyadid= 100130,
  gridid= subset(overlapinfo,  `100`== 1 &  `130` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `100`== 1 &  `130` ==1) %>% pull(claimants) %>% unique

m100135= cbind (
  dyadid= 100135,
  gridid= subset(overlapinfo,  `100`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `100`== 1 &  `135` ==1) %>% pull(claimants) %>% unique
## if none, remove matrix
rm(m100135) 

m110101= cbind (
  dyadid= 110101,
  gridid= subset(overlapinfo,  `110`== 1 &  `101` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `110`== 1 &  `101` ==1) %>% pull(claimants) %>% unique

m110115= cbind (
  dyadid= 110115,
  gridid= subset(overlapinfo,  `110`== 1 &  `115` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `110`== 1 &  `115` ==1) %>% pull(claimants) %>% unique

m130135= cbind (
  dyadid= 130135,
  gridid= subset(overlapinfo,  `130`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `130`== 1 &  `135` ==1) %>% pull(claimants) %>% unique


m140100= cbind (
  dyadid= 140100,
  gridid= subset(overlapinfo,  `140`== 1 &  `100` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `100` ==1) %>% pull(claimants) %>% unique

m140101= cbind (
  dyadid= 140101,
  gridid= subset(overlapinfo,  `140`== 1 &  `101` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `101` ==1) %>% pull(claimants) %>% unique
## if none, remove matrix
rm(m140101)


m140110= cbind (
  dyadid= 140110,
  gridid= subset(overlapinfo,  `140`== 1 &  `110` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `110` ==1) %>% pull(claimants) %>% unique
## if none, remove matrix
rm(m140110)


m140115= cbind (
  dyadid= 140115,
  gridid= subset(overlapinfo,  `140`== 1 &  `115` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `115` ==1) %>% pull(claimants) %>% unique


m140135= cbind (
  dyadid= 140135,
  gridid= subset(overlapinfo,  `140`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `135` ==1) %>% pull(claimants) %>% unique


m140150= cbind (
  dyadid= 140150,
  gridid= subset(overlapinfo,  `140`== 1 &  `150` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `150` ==1) %>% pull(claimants) %>% unique

m140165= cbind (
  dyadid= 140165,
  gridid= subset(overlapinfo,  `140`== 1 &  `165` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `165` ==1) %>% pull(claimants) %>% unique


m140220= cbind (
  dyadid= 140220,
  gridid= subset(overlapinfo,  `140`== 1 &  `220` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `140`== 1 &  `220` ==1) %>% pull(claimants) %>% unique
## if none, remove matrix
rm(m140220)

m145135= cbind (
  dyadid= 145135,
  gridid= subset(overlapinfo,  `145`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `145`== 1 &  `135` ==1) %>% pull(claimants) %>% unique 


m145140= cbind (
  dyadid= 145140,
  gridid= subset(overlapinfo,  `145`== 1 &  `140` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `145`== 1 &  `140` ==1) %>% pull(claimants) %>% unique 


m145150= cbind (
  dyadid= 145150,
  gridid= subset(overlapinfo,  `145`== 1 &  `150` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `145`== 1 &  `150` ==1) %>% pull(claimants) %>% unique 


m145155= cbind (
  dyadid= 145155,
  gridid= subset(overlapinfo,  `145`== 1 &  `155` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `145`== 1 &  `155` ==1) %>% pull(claimants) %>% unique 
## if none, remove matrix
rm(m145155)


m155135= cbind (
  dyadid= 155135,
  gridid= subset(overlapinfo,  `135`== 1 &  `155` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `135`== 1 &  `155` ==1) %>% pull(claimants) %>% unique 


m160140= cbind (
  dyadid= 160140,
  gridid= subset(overlapinfo,  `160`== 1 &  `140` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `160`== 1 &  `140` ==1) %>% pull(claimants) %>% unique 

m160145= cbind (
  dyadid= 160145,
  gridid= subset(overlapinfo,  `160`== 1 &  `145` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `160`== 1 &  `145` ==1) %>% pull(claimants) %>% unique  


m160150= cbind (
  dyadid= 160150,
  gridid= subset(overlapinfo,  `160`== 1 &  `150` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `160`== 1 &  `150` ==1) %>% pull(claimants) %>% unique  


m160155= cbind (
  dyadid= 160155,
  gridid= subset(overlapinfo,  `160`== 1 &  `155` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `160`== 1 &  `155` ==1) %>% pull(claimants) %>% unique  

m160165= cbind (
  dyadid= 160165,
  gridid= subset(overlapinfo,  `160`== 1 &  `165` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `160`== 1 &  `165` ==1) %>% pull(claimants) %>% unique  


m220115= cbind (
  dyadid= 220115,
  gridid= subset(overlapinfo,  `220`== 1 &  `115` ==1) %>% pull(gridid) %>% unique) 
subset(overlapinfo,  `220`== 1 &  `115` ==1) %>% pull(claimants) %>% unique 

combined = rbind(m100101, m100130, 
                 m110101, m110115,
                 m130135,
                 m140100, m140115, m140135, m140150, m140165,
                 m145135, m145140, m145150, 
                 m155135, m160140, m160145, m160150, m160155, m160165, m220115 ) %>% as.data.frame()

combined= combined %>% mutate(
  codeA= substr(dyadid, 1,3) %>% as.numeric(),
  codeB= substr(dyadid, 4,6) %>% as.numeric,
  newid= ifelse(codeA < codeB, codeA*1000+codeB, codeB*1000+codeA)
)
combined$newid %>% unique
combined$dyadid <- combined$newid
combined$newid <- NULL



###### (2) Add data on claimable border cells ########## 


borderoverlap <- read_excel("borderoverlap.xlsx")
borderoverlap %<>%  rename(gridid = Grid) %>% arrange(gridid)
borderoverlap = borderoverlap[ ,-c(1,2)]
dim(borderoverlap)

## now get cells with multiple countries (border cells)
borderoverlap %<>% group_by(gridid) %>% mutate(
  dups= n()
)
borderoverlap = borderoverlap %>% filter(dups>1)
borderoverlap$empty = 1

borderwide= borderoverlap %>% pivot_wider(names_from = ccode, values_from = empty)  
borderwide = na_replace(borderwide, 0)

#code in information about which countries the gridcell belongs to 
borderwide %<>% group_by(gridid) %>% mutate (
  across(`155`:`220`, function(x) max(x,na.rm=T) ))
borders= borderwide %>% group_by(gridid) %>% slice(1) #2506

## put this in dyad format 
m100101= cbind (
  dyadid= 100101,
  gridid= subset(borders,  `100`== 1 &  `101` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `100`== 1 &  `101` ==1) %>% pull(country) %>% unique

m100130= cbind (
  dyadid= 100130,
  gridid= subset(borders,  `100`== 1 &  `130` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `100`== 1 &  `130` ==1) %>% pull(country) %>% unique

m100135= cbind (
  dyadid= 100135,
  gridid= subset(borders,  `100`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `100`== 1 &  `135` ==1) %>% pull(country) %>% unique

m110101= cbind (
  dyadid= 110101,
  gridid= subset(borders,  `110`== 1 &  `101` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `110`== 1 &  `101` ==1) %>% pull(country) %>% unique

m110115= cbind (
  dyadid= 110115,
  gridid= subset(borders,  `110`== 1 &  `115` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `110`== 1 &  `115` ==1) %>% pull(country) %>% unique

m130135= cbind (
  dyadid= 130135,
  gridid= subset(borders,  `130`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `130`== 1 &  `135` ==1) %>% pull(country) %>% unique


m140100= cbind (
  dyadid= 140100,
  gridid= subset(borders,  `140`== 1 &  `100` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `100` ==1) %>% pull(country) %>% unique

m140101= cbind (
  dyadid= 140101,
  gridid= subset(borders,  `140`== 1 &  `101` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `101` ==1) %>% pull(country) %>% unique


m140110= cbind (
  dyadid= 140110,
  gridid= subset(borders,  `140`== 1 &  `110` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `110` ==1) %>% pull(country) %>% unique


m140115= cbind (
  dyadid= 140115,
  gridid= subset(borders,  `140`== 1 &  `115` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `115` ==1) %>% pull(country) %>% unique


m140135= cbind (
  dyadid= 140135,
  gridid= subset(borders,  `140`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `135` ==1) %>% pull(country) %>% unique

m140150= cbind (
  dyadid= 140150,
  gridid= subset(borders,  `140`== 1 &  `150` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `150` ==1) %>% pull(country) %>% unique


m140165= cbind (
  dyadid= 140165,
  gridid= subset(borders,  `140`== 1 &  `165` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `165` ==1) %>% pull(country) %>% unique



m140220= cbind (
  dyadid= 140220,
  gridid= subset(borders,  `140`== 1 &  `220` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `140`== 1 &  `220` ==1) %>% pull(country) %>% unique


m145135= cbind (
  dyadid= 145135,
  gridid= subset(borders,  `145`== 1 &  `135` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `145`== 1 &  `135` ==1) %>% pull(country) %>% unique 


m145140= cbind (
  dyadid= 145140,
  gridid= subset(borders,  `145`== 1 &  `140` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `145`== 1 &  `140` ==1) %>% pull(country) %>% unique 


m145150= cbind (
  dyadid= 145150,
  gridid= subset(borders,  `145`== 1 &  `150` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `145`== 1 &  `150` ==1) %>% pull(country) %>% unique 


m145155= cbind (
  dyadid= 145155,
  gridid= subset(borders,  `145`== 1 &  `155` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `145`== 1 &  `155` ==1) %>% pull(country) %>% unique 



m155135= cbind (
  dyadid= 155135,
  gridid= subset(borders,  `135`== 1 &  `155` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `135`== 1 &  `155` ==1) %>% pull(country) %>% unique 


m160140= cbind (
  dyadid= 160140,
  gridid= subset(borders,  `160`== 1 &  `140` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `160`== 1 &  `140` ==1) %>% pull(country) %>% unique 

m160145= cbind (
  dyadid= 160145,
  gridid= subset(borders,  `160`== 1 &  `145` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `160`== 1 &  `145` ==1) %>% pull(country) %>% unique  


m160150= cbind (
  dyadid= 160150,
  gridid= subset(borders,  `160`== 1 &  `150` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `160`== 1 &  `150` ==1) %>% pull(country) %>% unique  


m160155= cbind (
  dyadid= 160155,
  gridid= subset(borders,  `160`== 1 &  `155` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `160`== 1 &  `155` ==1) %>% pull(country) %>% unique  

m160165= cbind (
  dyadid= 160165,
  gridid= subset(borders,  `160`== 1 &  `165` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `160`== 1 &  `165` ==1) %>% pull(country) %>% unique  


m220115= cbind (
  dyadid= 220115,
  gridid= subset(borders,  `220`== 1 &  `115` ==1) %>% pull(gridid) %>% unique) 
subset(borders,  `220`== 1 &  `115` ==1) %>% pull(country) %>% unique 

combined2 = rbind(m100101, m100130, 
                  m100135,
                  m110101, m110115,
                  m130135,
                  m140100, m140101, m140110, m140115, m140135, m140150, m140165,
                  m140220,
                  m145135, m145140, m145150, 
                  m145155,
                  m155135, m160140, m160145, m160150, m160155, m160165, m220115 ) %>% as.data.frame()
combined2= combined2 %>% mutate(
  codeA= substr(dyadid, 1,3) %>% as.numeric(),
  codeB= substr(dyadid, 4,6) %>% as.numeric,
  newid= ifelse(codeA < codeB, codeA*1000+codeB, codeB*1000+codeA)
)
combined2$newid %>% unique
combined2$dyadid <- combined2$newid
combined2$newid <- NULL

#----------------------------------------------
### Create Dyad-Grid data (grids) ######
#----------------------------------------------

## now append the two data frames
## this will give us data on claimable dyads based on historical + border claims 

grids = rbind(combined, combined2)
grids$dygrid = paste0(grids$dyadid,'-' ,grids$gridid)

# remove overlaps between combined and combined 2 
grids %<>% group_by(gridid, dyadid) %>% slice(1)
grids %<>% arrange(gridid)

write.csv(grids, 'dyad-grid.csv')

#----------------------------------------------


######## Expand into panel data ################
## unit of analysis: dyad-grid-year 
## already have the gridcells potentially claimable by dyad
## with expand grid, expanding it to include years  


panel = expand.grid('dygrid' = grids$dygrid, 'year'= time) 
panel = panel %>%  arrange('dygrid', 'year')
panel=  left_join(panel, grids, by='dygrid')



##### Add dispute information #########

### info on disputed gridcells
gridclaim <- read_excel("gridclaim_multijoin.xlsx")
gridclaim %<>% rename(gridid= Grid)

#expanding grid data
gridclaim = gridclaim %>% mutate(
  begclaim = as.numeric(begclaim),
  endclaim= as.numeric(endclaim)
)
gridclaim$OBJECTID = 1:nrow(gridclaim)


gridclaim= gridclaim %>% group_by(gridid) %>%
  mutate(count= n())

## make appropriate duplicates

time= seq(1830, 2001, 1)
reprow= rep(rownames(gridclaim), length(time)) %>% as.numeric()
gridclaim_exp= gridclaim[reprow, ]
dim(gridclaim_exp)
#1736168   

#duplicate mark (not by grid but by how many there are =objectid, not gridid)

gridclaim_exp= gridclaim_exp %>% arrange(OBJECTID) %>%  group_by(OBJECTID) %>% 
  mutate(dup= 1:n())
#transform to year
gridclaim_exp= gridclaim_exp %>% mutate(
  year= 1830+ dup -1
) 

gridclaim_exp %<>%  dplyr::select('gridid', 'year', everything()) %>%  arrange(gridid, year)


#### populate expanded data (gridclaim) #####
## put in multiple disputes accordingly 
gridclaim_exp= gridclaim_exp %>% mutate(
  dyadid= ifelse(chal <tgt, chal*1000+tgt, tgt*1000+chal)
)




##now change these European dyads into current country claims
#save original dyad
gridclaim_exp$dyadid_orig <- gridclaim_exp$dyadid
# replace with european 
gridclaim_exp %<>% mutate(
  dyadid = case_when(dyadid_orig == 101200 ~ 101110, #essequibo
                     dyadid_orig == 110210 ~ 110115, #suriname: corentyn,
                     dyadid_orig == 140200 ~  110140, #bra-ukg pirara
                     dyadid_orig == 140210 ~ 115140, #bra-nth tumuc-humac
                     dyadid_orig ==  210220 ~ 115220, #sur-fra Maroni  
                     T~ dyadid_orig)
)
## not changing information on disputes that were actually with european powers
# 135230 (Chincha), 101210 (Los roques), 160200(falklands), 2130 (galapagos) 



gridclaim_exp= gridclaim_exp %>% group_by(gridid) %>% 
  mutate(
    start= ifelse(begclaim== year, 1, 0),
    end= ifelse(endclaim== year, 1, 0),
    check= ifelse(year>=begclaim & year <= endclaim, 1, 0),
    check= replace_na(check, 0))

gridclaim_exp= gridclaim_exp %>% group_by(gridid, year, dyadid) %>% 
  mutate(disputed = max(check, na.rm = T))  

gridclaim_exp %<>% arrange(gridid,dyadid,year)
gridclaim_exp$disputed %>% table (useNA = 'ifany')
# 314647



#### making gridclaim_sliced ######

##now slice to keep only one observation per gridcell-year-dyad 
## this slices off some claim numberes
## but data on whether or not the gridcell was disputed in the given year remains valid 
gridclaim_sliced = gridclaim_exp %>% group_by(gridid,year, dyadid) %>% slice(1)
gridclaim_sliced%<>% arrange(gridid,dyadid,year)


## merge dispute data into dyad-year data 
panel2 = left_join(panel, gridclaim_sliced, by= c('dyadid', 'year' , 'gridid')) 
panel2$disputed  = replace_na(panel2$disputed, 0)



#----------------------------------------------
###### Add info into empty panel data ###########
#----------------------------------------------


####  resources data #######

toy= base_exp %>% dplyr::select(c('gridid','year' , 'capord', 'capord_bin'))
panel2$capord = toy$capord[match(interaction(panel2$gridid, panel2$year), 
                                 interaction(toy$gridid, toy$year))]
panel2$capord_bin = toy$capord_bin[match(interaction(panel2$gridid, panel2$year), 
                                         interaction(toy$gridid, toy$year))] 

#### regime information (VDEM) #####

### Call in Vdem
vdem <- read_csv("V-Dem-CY-Full+Others-v13.csv")
vdem %<>% dplyr::select('country_name', 'country_text_id', 'country_id', 'year', 
                        'codingstart', 'codingend', 'COWcode', 
                        'e_regiongeo' , 'e_regionpol', 'v2x_freexp_altinf', 'v2x_libdem',
                        tidyselect::vars_select(names(vdem), starts_with('v2x_suffr', ignore.case = TRUE)), 
                        tidyselect::vars_select(names(vdem), starts_with('v2regoppgroupssize', ignore.case = TRUE)), 
                        tidyselect::vars_select(names(vdem), starts_with('v2psoppaut', ignore.case = TRUE)),
                        tidyselect::vars_select(names(vdem), starts_with('v2pepwrses', ignore.case = TRUE)),
                        tidyselect::vars_select(names(vdem), starts_with('v2mecrit', ignore.case = TRUE))) 

## also save south america vdem data for figure G1 later
vdem_SA = vdem %>% filter(e_regiongeo== 18 | e_regiongeo== 16 | country_name =='France')
vdem_SA %<>% filter(country_name!= 'United States of America' & country_name != 'Canada') 
write.csv(vdem_SA, 'vdem_SA.csv')


dat$soceq_A = vdem$v2pepwrses_osp[match(interaction(dat$ccodeA, dat$year), 
                                        interaction(vdem$COWcode, vdem$year))]
dat$soceq_B = vdem$v2pepwrses_osp[match(interaction(dat$ccodeB, dat$year), 
                                        interaction(vdem$COWcode, vdem$year))]
dat$oppaut_A = vdem$v2psoppaut_osp[match(interaction(dat$ccodeA, dat$year), 
                                         interaction(vdem$COWcode, vdem$year))]
dat$oppaut_B = vdem$v2psoppaut_osp[match(interaction(dat$ccodeB, dat$year), 
                                         interaction(vdem$COWcode, vdem$year))] 

# check = vdem %>% filter(country_name== 'Colombia' & year>=1830)  #okay 
dat =  dat %>% group_by(dyadid, year) %>% mutate(
  soceq_min = min(soceq_A, soceq_B, na.rm = T),
  soceq_av =  (soceq_A+soceq_B)/2,
  oppaut_min = min(oppaut_A, oppaut_B, na.rm = T),
  oppaut_av =  (oppaut_A+oppaut_B)/2
)
dat[sapply(dat, is.infinite)] <-NA
dat[sapply(dat, is.nan)] <-NA


## check keys
dat$dyadid %>% unique %>% sort
panel2$dyadid %>% unique %>% sort

## make final data 
fin = left_join(panel2, dat, by= c('dyadid', 'year') )


### Make sure gridcell is only coded as disputed when the relevant dyad is disputing it: 

fin = fin %>% mutate(
  realdisp = ifelse((fin$chal == fin$ccodeA | fin$chal == fin$ccodeB) & 
                      (fin$tgt == fin$ccodeA | fin$tgt ==fin$ccodeB), disputed, 0  ) %>%  replace_na(0),
  init = ifelse((fin$chal == fin$ccodeA | fin$chal == fin$ccodeB) & 
                  (fin$tgt == fin$ccodeA | fin$tgt ==fin$ccodeB), start, 0  ) 
)
fin$realdisp %>% table(useNA = 'ifany')
# 93254

fin = fin %>% group_by(dyadid, gridid) %>% mutate(
  realinit= ifelse(realdisp==0, 0, init),
  realinit= ifelse(realdisp==1 &init==0 | realdisp==1 & init ==1 & lag(realdisp)==1 , NA, realinit)
)
fin$realinit %>% table(useNA = 'ifany')
# 1251614    1249   92005 
fin$capord = factor(fin$capord, levels = c('Low', 'Med', 'High') )






## add in previously disputed and resolved claims 

fin= fin %>% group_by(dyadid, gridid) %>%
  mutate(
    ongoing= ifelse(is.na(realinit)==T, 1, 0),
    term= ifelse(realdisp==1 &dplyr::lead(realdisp,1)==0, 1 , ifelse(realdisp==1, 0 ,NA)),
    termyear=  ifelse(term==1, year, NA),
    first_termyear= min(termyear, na.rm = T)
  )
fin$first_termyear[is.infinite(fin$first_termyear)] <- NA

fin= fin %>% group_by(dyadid, gridid) %>%
  mutate(
    prevclaimed= ifelse(first_termyear<year & max(term, na.rm = T)==1, 1, 0)
  )


### european dyad code 
fin %<>% mutate(
  colonial = ifelse(dyadid== 101110 |  dyadid== 110115 |
                      dyadid== 110140| dyadid== 115140 |
                      dyadid== 115220 | dyadid== 140220, 1,0)
)


## more than 20% cinc change in either A or B 
fin = fin %>% group_by(dyadid) %>% mutate(
  twentycincA2 = ifelse(abs(cincA-lagcincA) > lagcincA*0.2, 1, 0),
  twentycincB2 = ifelse(abs(cincB-lagcincB) > lagcincB*0.2, 1, 0),
  twentychange = ifelse(twentycincA2 ==1 | twentycincB2==1, 1, 0),
  bopimbal =  abs((cincA/(cincA+cincB)) -0.5)    )

### save final data


#------------------------------------------------------------------------------
#####################  Save final data for Appendix  ########################### 

write.csv(fin, "dyad-year-gridcell.csv")
# save.image()

## Go to rscript-io-analysis for actual table and figure replication

############################  END of script ################################### 
#------------------------------------------------------------------------------

